/* -------------------------------------------------------------------CPS_taxsim_createextract.do
This file creates the extract for simulating taxes in the CPS.

Last updated: 5/29/13

// --------------------------------------------------------------------------------------------
*/

forvalues y=1986/2012 {
qui {

	use CPS_mar`y', clear
	qui gen yof=`y'
	cap rename hhseq h_seq
	cap rename pppos ppos
	cap rename a_age age
	cap rename peage age
	cap rename a_sex sex
	cap rename a_famrel relhead

// -------------------------------------------------- 1. Bring in the cell means

	cap drop _merge
	sort yof h_seq ppos
	qui merge 1:1 yof h_seq ppos using CPS_cellmeans.dta // copied from main directory $ddCPS
	tab yof _merge
	qui keep if _merge!=2
	
	// Fill the topcode values for non-topcoded invididuals
	if `y'<=1987 {
		foreach v_long of varlist i5*_tc pinctot_tc {
			local v_short=subinstr("`v_long'","_tc","",.)
			qui replace `v_long'=`v_short' if `v_long'==.&`v_short'<.
		}
	}
	else {
		foreach v_long of varlist *_*_tc ptotval_tc {
			local v_short=subinstr("`v_long'","_tc","",.)
			qui replace `v_long'=`v_short' if `v_long'==.&`v_short'<.
		}
	}
	drop _merge
	/* DO WE CARE ABOUT THIS?
	// Generate cell-mean consistent household income
	qui egen htotval_tc=total(ptotval_tc) if yof>1987, by(yof h_seq)
	qui egen hinctot_tc=total(pinctot_tc) if yof<1988, by(yof h_seq) 
	*/

	
// -------------------------------------------------- 2. Non-income variables
	
	
	// Bring in the tax units
	cap drop _merge
	merge 1:1 yof h_seq ppos using CPS_taxsim_tukey
	assert _merge>1
	keep if _merge==3
	drop _merge

	// Identify filers/dependents
	if `y'>1987 {
		drop pinctot_tc
		rename ptotval_tc pinctot_tc
		qui replace ws_val_tc = ws_val_tc + ern_val_tc if ern_srce==1
		qui replace se_val_tc = se_val_tc + ern_val_tc if ern_srce==2
		qui replace se_val_tc = se_val_tc*.9235 // you only pay taxes on 92.35%, we don't have to worry about this later
		qui replace frm_val_tc = frm_val_tc + ern_val_tc if ern_srce==3 // because we already have the true se_val in household
																		// income.
	}
	else {
		qui replace i51b=.9235*i51b
	}
	cap drop filer_status
	qui gen filer_status=-9
	#d ;
	label define filer_status
		-9 "Unassigned"
		 1 "Married, filing jointly"
		 2 "Single individual"
		 3 "Head of household (single)"
		 4 "Dependent filer"
		 5 "Dependent non-filer"
		 6 "Other non-filer";
	#d cr
	label val filer_status filer_status
	// If you are married, you are filing jointly with your spouse
	qui replace filer_status = 1 if married_now==1
		// Everyone else in that tax unit is a dependent
		cap drop temp1
		cap drop temp2
		qui gen temp1 = filer_status==1
		qui egen temp2 = max(temp1), by(yof h_seq taxunit)
	qui replace filer_status = 4 if filer_status==-9&temp2==1&pinctot>0
	qui replace filer_status = 5 if filer_status==-9&temp2==1&pinctot<=0
		drop temp*
	// Tax units of 1 are all single
		cap drop temp_N
		bys yof h_seq taxunit: gen temp_N=_N
	qui replace filer_status=2 if temp_N==1&filer_status==-9
		drop temp*		
	// Everyone else: highest (non-zero) earner is the filer, everyone else is dependent
		cap drop temp_maxinc
		cap drop temp_nonzero
		qui gen temp_nonzero = pinctot_tc if pinctot_tc!=0
		qui egen temp_maxinc = max(temp_nonzero), by(yof h_seq taxunit)
		cap drop temp_mostinc 
		qui gen temp_mostinc = pinctot_tc==temp_maxinc
		cap drop temp_maxage
		qui egen temp_maxage = max(age), by(yof h_seq taxunit)
		cap drop temp_oldest 
		qui gen temp_oldest = age==temp_maxage
		compare temp_mostinc temp_oldest if filer_status==-9 // almost always the same
	qui replace filer_status=3 if temp_mostinc==1&temp_oldest==1&filer_status==-9
	// If there are still multiple hh's, it should probably be a result of 2 same ages both with no income
		cap drop temp1
		cap drop temp2
		qui gen temp1 = filer_status==3
		qui egen temp2 = total(temp1), by(yof h_seq taxunit)
		* tab temp2
		cap assert pinctot_tc==0 if temp1==1&temp2>1
		if _rc!=0 {
			noisily {
				di "Multiple head problem is not the result of multiple 0's"
				list h_seq if temp1==1&temp2>1&pinctot_tc!=0
			}
			// In these cases, just give it to the first person (there's about 1/year)
			// - This appears to actually be the same thing as the duplicate observation
			// problem we ran into in the matching process
			cap drop temp3
			cap drop temp4
			cap drop temp5
			qui gen temp3 = temp1==1&temp2>1
			qui gen temp4 = ppos if temp2==1
			qui egen temp5 = min(temp4), by(h_seq taxunit)
			qui replace filer_status=4 if temp3==1&ppos==temp5
/*			if `y'==1987 { DON'T DO THIS CASE BY CASE ANYMORE
				replace filer_status=4 if h_seq==41412&ppos==2
			}
			if `y'==1988 {
				replace filer_status=4 if h_seq==10511&ppos==42
			} 
			*/
		}
		// If this is the case, both are non-filers!
		cap drop temp3
		cap drop temp4
		qui gen temp3 = ppos if temp1==1&temp2>1
		qui egen temp4 = min(temp3), by(yof h_seq taxunit)
		// Always 5 because we've determined no one has income
	qui replace filer_status=6 if temp1==1&temp2>1&filer_status==3
	// Next we assign the dependents of HH heads
		// Now recalculate 3's
		cap drop temp1
		cap drop temp2
		qui gen temp1 = filer_status==3
		qui egen temp2 = total(temp1), by(yof h_seq taxunit)
		* tab temp2
		// Everyone in a 1 filer household who is not the filer is a dependent
	qui replace filer_status=4 if temp1==0&temp2==1&filer_status==-9&pinctot>0
	qui replace filer_status=5 if temp1==0&temp2==1&filer_status==-9&pinctot<=0
	// If everyone in the TU has 0 income, then they're all non-filers
		cap drop temp_zeroinc
		cap drop temp_Nzeros
		cap drop temp_N
		cap drop temp_allzeros
		qui gen temp_zeroinc = pinctot_tc==0
		qui egen temp_Nzeros = total(temp_zeroinc), by(yof h_seq taxunit)
		bys yof h_seq taxunit: gen temp_N = _N
		qui gen temp_allzeros=temp_N==temp_Nzeros
	qui replace filer_status=6 if temp_allzeros==1
// At this point nearly everyone who is unassigned has no filer in the HH
// They get the following treatment:
	cap drop temp_filer
	qui gen temp_filer = inlist(filer_status,1,2,3)
	cap drop temp_Nfilers
	qui egen temp_Nfilers=total(temp_filer), by(yof h_seq taxunit)
	// No income? You are a non-filer
	qui replace filer_status=6 if pinctot_tc==0&filer_status==-9
	// Whomever has the highest income becomes the filer 
	// and everyone else in the TU is a dependent
	cap drop temp_Nhighe
	qui egen temp_Nhighe=total(temp_mostinc), by(yof h_seq taxunit)
	qui replace filer_status=3 if temp_Nhighe==1&temp_mostinc==1&filer_status==-9&temp_Nfilers==0
	// The cases left are children with parents present, parents have lower incomes, children have 
	// some kind of support (social security)
	cap drop temp_filer
	qui gen temp_filer = inlist(filer_status,1,2,3)
	cap drop temp_Nfilers
	qui egen temp_Nfilers=total(temp_filer), by(yof h_seq taxunit)
	cap drop temp_Noldest
	qui egen temp_Noldest=total(temp_oldest), by(yof h_seq taxunit)
	qui replace filer_status=3 if temp_Noldest==1&temp_oldest==1&filer_status==-9&temp_Nfilers==0
// Now we have some stray clean up
	cap drop temp_filer
	qui gen temp_filer = inlist(filer_status,1,2,3)
	cap drop temp_Nfilers
	qui egen temp_Nfilers=total(temp_filer), by(yof h_seq taxunit)
	tab filer_stat temp_Nfilers, mi
	// Assign dependents now that we have primary filers in new HHs
	qui replace filer_stat=4 if filer_stat==-9
	loc ym1 = `y'-1

	di "===================================="
	di " Tax year `ym1'"
	di "===================================="
	tab filer_stat

	noi {
		di "`ym1'", _c
		qui levelsof filer_stat, local(fss)
		foreach fs of local fss {
			cap drop temp_fs
			qui gen temp_fs=filer_stat==`fs'
			qui summ temp_fs, mean
			loc tfsa=round(r(mean)*100,.01)
			di "`tfsa'", _c
			
		}
		di ""
	}



// -------------------------------------------------- 3. Individual income sources:


	// Wages
	cap drop wages_i
	qui gen wages_i=.
	if `y'<=1987 {
		qui replace wages_i = i51a + i51b + i51c if i51b>=0&i51c>=0
		qui replace wages_i = i51a if i51b<0|i51c<0
	}
	else {
		qui replace wages_i = ws_val_tc+se_val_tc+frm_val_tc if se_val_tc>=0&frm_val_tc>=0
		qui replace wages_i = ws_val_tc if se_val_tc<0|frm_val_tc<0
	}
	label var wages_i "Individual wages"
	
	// Dividends
	cap drop dividends_i
	qui gen dividends_i=.
	if `y'<=1987 qui replace dividends_i=i53c // this also includes rental income until 1988
	else qui replace dividends_i=div_val_tc
	label var dividends_i "Individual dividend income"
	
	// Other property income
	// - post 1987 we have rent and interest separate from dividends
	cap drop otherprop_i
	if `y'<=1987 {
		qui gen otherprop_i=i53b
		qui replace otherprop_i=otherprop_i+i51b+i51c if i51b<0|i51c<0
	}
	else {
		qui gen otherprop_i=int_val_tc + rnt_val_tc
		qui replace otherprop_i=otherprop_i+se_val_tc+frm_val_tc if se_val_tc<0|frm_val_tc<0
	}
	label var otherprop_i "Individual other property income"
	
	// Pensions
	cap drop pensions_i
	if `y'<=1987 qui gen pensions_i=i53e 
	else qui gen pensions_i=ret_val1_tc + ret_val2_tc
	label var pensions_i "Individual pension income"
	
	// SS
	cap drop gssi_i
	if `y'<=1987 qui gen gssi_i = i52a+i52b // Social security + SSI
	else {
		qui gen gssi_i = ss_val_tc+ssi_val_tc+sur_val1_tc+sur_val2_tc+dis_val1_tc+dis_val2_tc
	}
	label var gssi_i "Individual gross SS"
		// Post 1987 we have survivor and disability benefits
	
	// Individual transfer income
	cap drop transfers_i
	if `y'<=1987 qui gen transfers_i=i53a
	qui gen transfers_i=paw_val_tc+wc_val_tc+vet_val_tc+csp_val_tc
	
	// UI - pre 1988, this also has Vet and Worker's comp
	cap drop ui_i
	if `y'<=1987 qui gen ui_i=i53d
	else qui gen ui_i=uc_val_tc
	/* Just like Judith Scott-Clayton, we ignore
	- Educational assistance
	- Alimony
	- Contrib/financial asst
	- Misc (other) income
	*/
	
	// Dummies for SE income (+/-)
	cap drop selfemp
	qui gen selfemp=i51b!=0
	cap drop selfemp2
	qui gen selfemp2=i51b!=0|i51c!=0 // also farm income
	

// -------------------------------------------------- 4. TU level income coding
	
	// 2nd tax unit identifier (this one separates out dependent filers)
	cap drop temp1
	cap drop temp2
	cap drop taxunit2
	qui gen temp1 = taxunit
	bys yof h_seq taxunit: gen temp2=_n
	qui replace temp1 = temp1+100+temp2 if filer_status==4 // use taxunit to count dependents
	rename temp1 taxunit2								   // use taxunit2 to collaps
														
														   
	
	// Distribute wages of primary and secondary
	cap drop pwages
	cap drop swages
	qui gen pwages = wages_i if inlist(filer_stat,1,2,3,4)
	cap drop temp1
	cap drop temp2
	cap drop temp3
	qui gen temp1 = wages_i if filer_stat==1
	qui egen temp2 = min(temp1), by(yof h_seq taxunit)
	qui egen temp3 = max(temp1), by(yof h_seq taxunit)
	qui replace pwages = temp3 if filer_stat==1
	qui gen swages=0
	qui replace swages = temp2 if filer_stat==1
	
	
	loc vl1 "dividends_i otherprop_i pensions_i gssi_i transfers_i ui_i"
	foreach v in `vl1' {
		loc v2=subinstr("`v'","_i","",.)
		cap drop temp
		qui gen temp=`v' if inlist(filer_status,1,2,3)
		qui egen `v2' = total(`v'), by(yof h_seq taxunit2)
		qui replace `v2'=`v' if filer_status==4
	}
	
	cap drop alimony
	qui egen alimony = total(alm_val_tc), by(yof h_seq taxunit2)
 * We're going to let taxsim create the AGI
	// Create an agi measure - not the official one, just something to use
	// for the imputation
	cap drop agi
	qui gen agi = pwages + swages + dividends + otherprop + pensions + ///
					gssi + alimony //above + alimony - transfers -ui
	
	
	cap drop y_group
	cap drop y_benchmark
	qui summ agi
	loc magi=r(max)
	/*	   
	// make a separate recode for benchmarking against CE/IRS 2009 ONLY
	recode agi (0/4999=0) (5000/9999=1) (10000/14999=2) (15000/19999=3) ///
			(20000/29999=4) (30000/39999=5) (40000/49999=6) (50000/69999=7) (70000/79999=8) ///
			(80000/99999=9) (100000/119999=10) (120000/149999=11) (150000/`magi'=12), gen(y_benchmark)
			
	
	
	// Create table
	 We only do this for FY 2009 because the groups are specific to the BLS table
	preserve
		forvalues b=0/12 {
			cap drop yb_`b'
			qui gen yb_`b' = y_benchmark==`b' if inlist(filer_status,1,2,3,4)
		}
		qui replace yb_0=. if agi<=0
		keep if inlist(filer_status,1,2,3,4)
		collapse (mean) yb_* , by(yof h_seq taxunit2)
		cap drop temp1
		qui egen temp1=rowtotal(yb_*)
		drop if temp1!=1
		list yof h_seq taxunit2 yb* if _n<100
		tabstat yb*
		* tab yb_3 yb_4
	restore
	*/ 
	// do that again for non-1999 years
	recode agi (0/4999=0) (5000/9999=1) (10000/14999=2) (15000/19999=3) ///
			(20000/24999=4) (25000/29999=5) (30000/39999=6) (40000/49999=7) (50000/74999=8) ///
			(75000/99999=9) (100000/199999=10) (200000/499999=11) (500000/999999=12) (1000000/`magi'=13), gen(y_benchmark)

	// Create table
	preserve
		forvalues b=0/13 {
			cap drop yb_`b'
			qui gen yb_`b' = y_benchmark==`b' if inlist(filer_status,1,2,3,4)
		}
		qui replace yb_0=. if agi<=0
		keep if inlist(filer_status,1,2,3,4)
		collapse (mean) yb_* , by(yof h_seq taxunit2)
		cap drop temp1
		qui egen temp1=rowtotal(yb_*)
		drop if temp1!=1
		noi di "AGI breakdown"
		list yof h_seq taxunit2 yb* if _n<100
		noi tabstat yb*
		* tab yb_3 yb_4
	restore
	*/ 
	
	// Create a new y_group for imputation
	cap drop cpi // all incomes are adjusted to 2009
	set obs `=`=_N'+1'
	cap drop year
	qui gen year = `y'-1 // y is yof
	qui replace year=1999 if ppos==.
	getcpi, year(year) gen(cpi)
	qui summ cpi if year==1999, mean
	qui replace cpi=cpi/r(mean)
	drop if ppos==.
	qui replace agi=round(agi/cpi)	
	cap drop y_group
	qui summ agi
	local magi=r(max)
	recode agi  (0/9999=0) (10000/19999=1) (20000/29999=2) (30000/39999=3) ///
		   (40000/49999=4) (50000/59999=5) (60000/69999=6) (70000/79999=7) ///
		   (80000/89999=8) (90000/99999=9) (100000/124999=10) (125000/149999=11) ///
		   (150000/174999=12) (175000/199999=13) (200000/`magi'=14), gen(y_group)
	
	
*/	

// -------------------------------------------------- 5. Impute itemized deductions	

	
	// Prep the "share of itemizers" file
	
	local yk=`y'
	if `yk'>2006 local yk=2006
	preserve
		use share, clear
		cap drop yr
		qui gen yr=real(_rowname)
		drop _rowname
		qui keep if yr==`yk' // bring forward 2006 values
		qui replace yr=`y'
		reshape long S, i(yr) j(temp)
		rename S share_itemizers
		rename temp y_group
		tempfile share
		save `share', replace
	restore
	
	// Prep the share of income file
	preserve
		use item_adgrin, clear
		cap drop yr
		qui gen yr=real(_rowname)
		drop _rowname
		qui keep if yr==`yk'
		qui replace yr=`y'
		reshape long S, i(yr) j(temp)
		rename S share_agi
		rename temp y_group
		tempfile sh_agi
		save `sh_agi', replace
	restore

	// Get imputed values for itemized deductions!
	preserve
		keep if inlist(filer_status,1,2,3,4)
		// Only keep one of the married folks (they only file 1 return)
		cap drop temp_N
		cap drop temp_n
		bys h_seq taxunit2: gen temp_N=_N
		bys h_seq taxunit2: gen temp_n=_n
		qui keep if temp_n==1
		
		// Bring in the share data
		cap drop _merge
		merge m:1 y_group using `share'
		tab y_group _merge
		list if _m==2
		* bys _m: summ y_group, d
		* bys _m: summ agi, d
		keep if _merge==3 // this is just folks who make less than 10000
		drop _merge
		
		// Bring in the prop income data
		cap drop _merge
		merge m:1 y_group using `sh_agi'
		tab y_group _merge
		keep if _merge==3
		drop _merge
		
		cap drop temp_rand1
		qui gen temp_rand1=runiform()
		sort temp_rand1
		cap drop temp_rand2
		qui gen temp_rand2=runiform()
		
		cap drop itemizer
		qui gen itemizer=temp_rand2<=share_itemizers
		summ itemizer, d
		cap drop itemized_share
		qui gen itemized_share=0
		qui replace itemized_share=share_agi if itemizer==1
		bys itemizer: summ itemized_share, d
		duplicates report h_seq taxunit2
		keep yof h_seq taxunit2 itemized_share
		tempfile item_deduc
		save `item_deduc', replace
	restore
	
	merge m:1 yof h_seq taxunit2 using `item_deduc'
	assert agi<10000 if _m<3&agi<.
	drop _merge

	
	
	
	// Put the agi back to 2009 dollars and create a table for benchmarking against SOI
preserve
	cap drop cpi
	set obs `=`=_N'+1'
	cap drop year
	qui gen year = `y'-1 // y is yof
	local ym1 = `y'-1
	qui replace year=1999 if ppos==.
	getcpi, year(year) gen(cpi)
	qui summ cpi if year==`ym1', mean
	replace cpi=cpi/r(mean)
	qui replace agi=agi/cpi
	qui summ agi
	// Create a new y_group for imputation
	local magi=r(max)
	cap drop y_group
	recode agi  (0/4999=0) (5000/9999=1) (10000/14999=2) (15000/19999=3) ///
		   (20000/24999=4) (25000/29999=5) (30000/39999=6) (40000/49999=7) ///
		   (50000/74999=8) (75000/99999=9) (100000/199999=10) (200000/499999=11) ///
		   (500000/999999=12) (1000000/1499999=13) (1500000/`magi'=14), gen(y_group)			   
	
		cap drop deduc_amt
		qui gen deduc_amt = itemized_share*agi if itemized_share>0&itemized_share<.
		qui replace deduc_amt=0 if itemized_share==.
		qui gen deduc_item = itemized_share>0
		bys h_seq taxunit2: keep if _n==1
		keep if inlist(filer_status,1,2,3,4)
		collapse (mean) deduc_* , by(y_group)
		noi di "Deductions"
		noi list if y_group>=0, clean noobs // this is our breakdown of deductions
	restore
}		
}
STOP
// -------------------------------------------------- 5. Final code up
	
	// Alpha state list
	cap rename gestcen stcps
	cap rename hg_st60 stcps
	#d ;
	gen state= . ;
	replace state=20 if stcps==11 ; /*Maine        */
	replace state=30 if stcps==12 ; /*NewHampshire */
	replace state=46 if stcps==13 ; /*Vermont      */
	replace state=22 if stcps==14 ; /*Massachusetts*/
	replace state=40 if stcps==15 ; /*RhodeIsland  */
	replace state=7  if stcps==16 ; /*Connecticut  */
	replace state=33 if stcps==21 ; /*NewYork      */
	replace state=31 if stcps==22 ; /*NewJersey    */
	replace state=39 if stcps==23 ; /*Pennsylvania */
	replace state=36 if stcps==31 ; /*Ohio         */
	replace state=15 if stcps==32 ; /*Indiana      */
	replace state=14 if stcps==33 ; /*Illinois     */
	replace state=23 if stcps==34 ; /*Michigan     */
	replace state=50 if stcps==35 ; /*Wisconsin    */
	replace state=24 if stcps==41 ; /*Minnesota    */
	replace state=16 if stcps==42 ; /*Iowa         */
	replace state=26 if stcps==43 ; /*Missouri     */
	replace state=35 if stcps==44 ; /*NorthDakota  */
	replace state=42 if stcps==45 ; /*SouthDakota  */
	replace state=28 if stcps==46 ; /*Nebraska     */
	replace state=17 if stcps==47 ; /*Kansas       */
	replace state=8  if stcps==51 ; /*Delaware     */
	replace state=21 if stcps==52 ; /*Maryland     */
	replace state=9  if stcps==53 ; /*DC           */
	replace state=47 if stcps==54 ; /*Virginia     */
	replace state=49 if stcps==55 ; /*WestVirginia */
	replace state=34 if stcps==56 ; /*NorthCarolina*/
	replace state=41 if stcps==57 ; /*SouthCarolina*/
	replace state=11 if stcps==58 ; /*Georgia      */
	replace state=10 if stcps==59 ; /*Florida      */
	replace state=18 if stcps==61 ; /*Kentucky     */
	replace state=43 if stcps==62 ; /*Tennessee    */
	replace state=1  if stcps==63 ; /*Alabama      */
	replace state=25 if stcps==64 ; /*Mississippi  */
	replace state=4  if stcps==71 ; /*Arkansas     */
	replace state=19 if stcps==72 ; /*Louisiana    */
	replace state=37 if stcps==73 ; /*Oklahoma     */
	replace state=44 if stcps==74 ; /*Texas        */
	replace state=27 if stcps==81 ; /*Montana      */
	replace state=13 if stcps==82 ; /*Idaho        */
	replace state=51 if stcps==83 ; /*Wyoming      */
	replace state=6  if stcps==84 ; /*Colorado     */
	replace state=32 if stcps==85 ; /*NewMexico    */
	replace state=3  if stcps==86 ; /*Arizona      */
	replace state=45 if stcps==87 ; /*Utah         */
	replace state=29 if stcps==88 ; /*Nevada       */
	replace state=48 if stcps==91 ; /*Washington   */
	replace state=38 if stcps==92 ; /*Oregon       */
	replace state=5  if stcps==93 ; /*California   */
	replace state=2  if stcps==94 ; /*Alaska       */
	replace state=12 if stcps==95 ; /*Hawaii       */
	label var state "[TS2] State (SOI codes)" ;
	#d cr


